Image Compression

We are going to illustrate Principle Components Analysis (PCA), a dimension reduction technique using an image. But, before that please note that much of the code that follows has been inspired by Aaron Schlegel’s Blog Post on SVD and PCA.

Read Image

You must read the data before trying to run code on your own machine. To read data use the following code after setting your working directory. To set your working directory, modify the following to set the file path for the folder where the data file resides. setwd('c:/thatawesomeclass/)

library(jpeg)
pic = readJPEG("pic.jpg")

Extract Color Matrices

r = pic[,,1]
g = pic[,,2]
b = pic[,,3]

Principle Components Analysis (PCA)

Run PCA on each Color Matrix

pca_r = prcomp(r,center=F)
pca_g = prcomp(g,center=F)
pca_b = prcomp(b,center=F)
pca_rgb = list(pca_r, pca_g, pca_b)

Variance Explained

First few components explain the bulk of the variance.

varExplained =
  data.frame(components = 1:length(pca_r$sdev), 
           r = pca_r$sdev^2/sum(pca_r$sdev^2),
           g = pca_g$sdev^2/sum(pca_g$sdev^2),
           b = pca_b$sdev^2/sum(pca_b$sdev^2))

varExplained = 
  cbind(varExplained, 
      cum_r = cumsum(varExplained$r),
      cum_g = cumsum(varExplained$g),
      cum_b = cumsum(varExplained$b))
#varExplained[1:100,]
library(ggplot2); library(dplyr); library(tidyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
varExplained %>%
  select(components, cum_r, cum_g, cum_b)%>%
  gather(key = color, value = cumulative_variance,2:4)%>%
  ggplot(aes(x=components, y = cumulative_variance, color=color))+
  geom_point()+
  geom_hline(yintercept = 0.99,size=0.6, linetype='dashed')+
  scale_color_manual(values = c('blue','green','red'))+facet_grid(.~color)+guides(color=F)

Construct Image

Reconstruct image from components

library(ggmap)
compressImage = function(comp){
  r_var = round(varExplained[varExplained$components==comp, 'cum_r'],2)*100
  g_var = round(varExplained[varExplained$components==comp, 'cum_g'],2)*100
  b_var = round(varExplained[varExplained$components==comp, 'cum_b'],2)*100
  compressed_pic_pca = 
  sapply(pca_rgb, function(img){
    img$x[,1:comp] %*% t(img$rotation[,1:comp])
  },simplify = 'array')
 ggimage(compressed_pic_pca,fullpage = F)+
   ggtitle(paste0(comp,' Components',': ',r_var,'%R, ',g_var,'%G, ',b_var,'%B'))+
   theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        plot.title = element_text(size=11))
}

Original Image

ggimage(pic,fullpage = F)+
  ggtitle('Original Image')+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        plot.title = element_text(size=11))

Image with all Components

compressImage(nrow(varExplained))

Image with 500 components

compressImage(500)

Image with 100 components

compressImage(100)

Image with 50 components

compressImage(50)

Image with 10 components

compressImage(10)

Image with 9 components

compressImage(9)

Image with 8 components

compressImage(8)

Image with 7 components

compressImage(7)

Image with 6 components

compressImage(6)

Image with 5 components

compressImage(5)

Grid of Images

All components, 500, 100 50, 10, 8 7, 6, 5

library(ggmap); library(dplyr)
compressImage = function(comp){
  r_var = round(varExplained[varExplained$components==comp, 'cum_r'],2)*100
  g_var = round(varExplained[varExplained$components==comp, 'cum_g'],2)*100
  b_var = round(varExplained[varExplained$components==comp, 'cum_b'],2)*100
  compressed_pic_pca = 
  sapply(pca_rgb, function(img){
    img$x[,1:comp] %*% t(img$rotation[,1:comp])
  },simplify = 'array')
 ggimage(compressed_pic_pca,fullpage = T)
}
library(gridExtra)
g1 = compressImage(nrow(varExplained))
g2 = compressImage(500)
g3 = compressImage(100)
g4 = compressImage(50)
g5 = compressImage(10)
g6 = compressImage(8)
g7 = compressImage(7)
g8 = compressImage(6)
g9 = compressImage(5)
grid.arrange(g1, g2, g3, g4, g5, g6, g7, g8, g9, ncol=3)

Singular Value Decomposition (SVD)

Run SVD on each Color Matrix

svd_r = svd(r)
svd_g = svd(g)
svd_b = svd(b)
svd_rgb = list(svd_r, svd_g, svd_b)

Variance Explained

First few components explain the bulk of the variance.

varExplained_svd =
  data.frame(components = 1:length(svd_r$d), 
           r = svd_r$d^2/sum(svd_r$d^2),
           g = svd_g$d^2/sum(svd_g$d^2),
           b = svd_b$d^2/sum(svd_b$d^2))

varExplained_svd = 
  cbind(varExplained_svd, 
      cum_r = cumsum(varExplained_svd$r),
      cum_g = cumsum(varExplained_svd$g),
      cum_b = cumsum(varExplained_svd$b))
#varExplained_svd[1:100,]
library(ggplot2); library(dplyr); library(tidyr)
varExplained_svd %>%
  select(components, cum_r, cum_g, cum_b)%>%
  gather(key = color, value = cumulative_variance,2:4)%>%
  ggplot(aes(x=components, y = cumulative_variance, color=color))+
  geom_point()+
  geom_hline(yintercept = 0.99,size=0.6, linetype='dashed')+
  scale_color_manual(values = c('blue','green','red'))+facet_grid(.~color)+guides(color=F)

## Reconstruct Image
Construct Image from Components

library(ggmap)
compressImage_svd = function(comp){
  r_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_r'],3)*100
  g_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_g'],3)*100
  b_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_b'],3)*100
  compressed_pic_svd = 
  sapply(svd_rgb, function(img){
    img$u[,1:comp] %*% diag(img$d[1:comp]) %*% t(img$v[,1:comp])
  },simplify = 'array')
 ggimage(compressed_pic_svd,fullpage = F)+
   ggtitle(paste0(comp,' Components',': ',r_var,'%R, ',g_var,'%G, ',b_var,'%B'))+
   theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        plot.title = element_text(size=11))
}

Image with all Components

compressImage_svd(nrow(varExplained_svd))

Image with 500 components

compressImage_svd(500)

Image with 100 components

compressImage_svd(100)

Image with 50 components

compressImage_svd(50)

Image with 10 components

compressImage_svd(10)

Image with 9 components

compressImage_svd(9)

Image with 8 components

compressImage_svd(8)

Image with 7 components

compressImage_svd(7)

Image with 6 components

compressImage_svd(6)

Image with 5 components

compressImage_svd(5)

Grid of Images

All components, 500, 100 50, 10, 8 7, 6, 5

library(ggmap)
compressImage_svd = function(comp){
  r_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_r'],3)*100
  g_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_g'],3)*100
  b_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_b'],3)*100
  compressed_pic_svd = 
  sapply(svd_rgb, function(img){
    img$u[,1:comp] %*% diag(img$d[1:comp]) %*% t(img$v[,1:comp])
  },simplify = 'array')
 ggimage(compressed_pic_svd,fullpage = T)
}
library(gridExtra)
g1 = compressImage_svd(nrow(varExplained))
g2 = compressImage_svd(500)
g3 = compressImage_svd(100)
g4 = compressImage_svd(50)
g5 = compressImage_svd(10)
g6 = compressImage_svd(8)
g7 = compressImage_svd(7)
g8 = compressImage_svd(6)
g9 = compressImage_svd(5)
grid.arrange(g1, g2, g3, g4, g5, g6, g7, g8, g9, ncol=3)


This file was generated using R Version 4.1.2